from IPython.display import Image
Image(filename='../imgs/banner.png')
%load_ext pretty_jupyter

Authors:

  • Bailey Passmore, Data Scientist, HRDAG
  • Larry Barrett, Consultant, HRDAG
import pendulum

Monday, 17 March 2025 at 11:47 AM (PDT)

%%html

<style>
    #Styling {
        font-weight: bold;
        font-family: Helvetica;
    }
</style>

Goal

  • What we have in dev-pre-restructure is working fine but we're on a short deadline and need to streamline the data. Let's make a barebones table of the core data we need for analysis, including:
    • event_no
    • date_occurred (w/ year_occurred)
    • event_type (a field derived from the initial event type in the data, either 911 call reporting gunfire or Shotspotter GDT alert)
    • event_location (service address if specified)
    • date_dispatched (is this the same as date of arrival?)
    • area (the result of mapping the reported police district to the police area)

Research questions we're working towards

  1. Is dispatch reported at the same rate for all districts?
  2. RE: Soundthinking / Brookings Institute claim that some 80% of gunfire events do not get reported by citizens - is that True in Chicago?
    • When SST alerts aren't matched to 911 calls, what is the typical disposition of such an alert?
    • When 911 calls aren't matched to SST alert, what is the typical disposition?

Time period covered

  • Earliest date occurred included: '2021-01-01'
  • Last date occurred included: '2024-09-23' (Per CPD, "ShotSpotter technology for the City of Chicago was discontinued in September 2024, meaning no records are available beyond that date.") The last date a ShotSpotter alert appears in the data is 23 September 2024, so we include records up to and including this date.

Identifying Event type (based on the _inittype field)

shotspotter = ['SST', 'PSST', 'MSST'] # keywords provided by CPD in Info sheet
citizencalls = ['SHOTS', 'SHOTSF', 'PERSHO',] # Note: 'PERGUN', 'PERDOW','PERHLP', 'DOMBAT', etc. excluded

Note about what humans can do that SST can't

In the OEMC data, in addition to reports originating as calls about shots fired ('SHOTSF') or persons being shot ('PERSHO'), we also see events with an initial label 'PERGUN' and final label referring to shots fired or someone shot. This tells us that not only do Chicagoans report gunfire in general, they may also report early warning signs of conflict involving firearms before any shots occur, giving first responders a head start to arrive on scene and provide potentially life-saving intervention.

# dependencies
import re
import numpy as np
from datetime import timedelta
import pandas as pd
from scipy.stats import power_divergence
# support methods
def format_count(v):
    return "{:,}".format(v)


def format_longfloat(prop):
    return float(f'{prop:.3f}')


def format_prop(prop, decn=1, asperc=True):
    if asperc: prop = prop*100
    return "{}%".format(round(prop, decn))


def report_fields(df, idcol, cols, fillna=False, headn=10):
    data = df[[idcol] + cols].drop_duplicates()
    if fillna:
        count = data[cols].fillna('None reported').value_counts().to_frame().reset_index()
        perc = data[cols].fillna('None reported').value_counts(normalize=True).to_frame().reset_index(
            ).rename(columns={'proportion': 'percent'})
    else:
        count = data[cols].value_counts().to_frame().reset_index()
        perc = data[cols].value_counts(normalize=True).to_frame().reset_index(
            ).rename(columns={'proportion': 'percent'})
    count['count'] = count['count'].apply(format_count)
    perc.percent = perc.percent.apply(format_prop)
    out = pd.merge(count, perc, on=cols)
    return out.head(headn)


def report_overtime(df, idcol, yearcol):
    yearly = df[[idcol, yearcol]].groupby(yearcol).nunique().reset_index()
    yearly[yearcol] = yearly[yearcol].astype(str)
    yearly = yearly.sort_values(yearcol)
    return yearly


def checkgroup(df):
    reporttrue = int(df.loc[df.dispatch_reported, 'count'].values[0].replace(',', ''))
    reportfalse = int(df.loc[~df.dispatch_reported, 'count'].values[0].replace(',', ''))
    return reportfalse > reporttrue


def lookup_cramer_cat(msrmt, df):
    if df == 1:
        if msrmt <= 0.1: return 'small'
        elif msrmt <= 0.3: return 'medium'
        else: return 'large'
    elif df == 2:
        if msrmt <= 0.07: return 'small'
        elif msrmt <= 0.21: return 'medium'
        else: return 'large'
    elif df == 3:
        if msrmt <= 0.06: return 'small'
        elif msrmt <= 0.17: return 'medium'
        else: return 'large'
    elif df == 4:
        if msrmt <= 0.05: return 'small'
        elif msrmt <= 0.15: return 'medium'
        else: return 'large'
    elif df == 5:
        if msrmt <= 0.04: return 'small'
        elif msrmt <= 0.13: return 'medium'
        else: return 'large'


def cramers(stat, n, df):
    msrmt = np.sqrt(stat/(n*df))
    cat = lookup_cramer_cat(msrmt=msrmt, df=df)
    return format_longfloat(msrmt), cat


def text_statistic(powerdiv, sig, n, degf, null_phrase, exp=None):
    rounded_stat = format_longfloat(powerdiv.statistic)
    rounded_pval = format_longfloat(powerdiv.pvalue)
    if rounded_pval == 0: rounded_pval = "< 0.0001"
    if exp: newinfo = f"This test results in a p-value of {rounded_pval}, "
    else: newinfo = f"This test results in a p-value of {rounded_pval}, "
    if powerdiv.pvalue < sig:
        effect_size, effect_cat = cramers(powerdiv.statistic, n, degf)
        newinfo += f"which is a statistically significant difference and rejects the null hypothesis that {null_phrase}. The test indicates a {effect_cat} effect. "
    else: newinfo += f"which is not statistically significant and fails to reject the null hypothesis that {null_phrase}. "
    if n < 30: newinfo += f"However, the sample size of {n} is small."
    return newinfo
# main
colorder = [
    'event_no',
    'date_occurred',
    'date_dispatched',
    'area',
    'location',
    #'location_x',
    #'location_y',
    'init_type',
    'event_type_init',
    'fin_type',
    'event_type_fin',
    'disposition',
    'event_type',
    'shotspotter_alert',
    'human_caller',
    'year_occurred',
    'dispatch_reported',
    'labeled_duplicate',
    'shotspotter_first',
    'disposition_reported',
    'misc_event',
    'early_warning',
    'any_coref',
    'cluster',
    'n_events',
]
data = pd.read_parquet("../../indicate/output/events.parquet")[colorder]
assert data.event_no.nunique() == data.shape[0]
sst = data.loc[data.event_type == 'ShotSpotter alert']
calls = data.loc[data.event_type == 'Human reporting gunfire']

Review data

Sample emergency event record.
155064
event_no 2216216847
date_occurred 2022-06-11 22:08:19
date_dispatched NaT
area 1
location 11XX W GARFIELD BL
init_type PERSHO
event_type_init PERSON SHOT
fin_type PERSHO
event_type_fin PERSON SHOT
disposition None
event_type Human reporting gunfire
shotspotter_alert False
human_caller True
year_occurred 2022
dispatch_reported False
labeled_duplicate None
shotspotter_first False
disposition_reported False
misc_event None
early_warning False
any_coref False
cluster 2216216847
n_events 1

Counts

Overall

  • There are 385,448 gunfire-related 911 calls and ShotSpotter alerts prepared for this analysis.
  • The data cover a time period between 2021-01-01 and 2024-09-22.

Source types

  • Frequency table:
event_type count percent
0 Human reporting gunfire 228,552 59.3%
1 ShotSpotter alert 156,896 40.7%
  • Summary: Of the 385,448 emergency events included in the analysis,
    • 228,552 or 59.3% were generated by a 911 call, and
    • 156,896 or 40.7% were generated by a ShotSpotter alert.

Initial Event types

Presented are:

  • the initial event type as reported by OEMC and CPD (init_type),
  • the description of the initial type as found in the data (event_type_init), and
  • the type of source which reported the event (event_type).
init_type event_type_init event_type count percent
0 SHOTSF SHOTS FIRED Human reporting gunfire 183,961 47.7%
1 SST SHOT SPOTTER ShotSpotter alert 113,790 29.5%
2 MSST Multiple Shot - ShotSpotter ShotSpotter alert 38,855 10.1%
3 PERSHO PERSON SHOT Human reporting gunfire 31,567 8.2%
4 SHOTS SHOTS FIRED (OV) Human reporting gunfire 13,024 3.4%
5 PSST Probable Shot - ShotSpotter ShotSpotter alert 4,251 1.1%

source_yearly = data[['event_no', 'year_occurred', 'event_type']
    ].groupby(['year_occurred', 'event_type']).nunique().reset_index()
src_ylr_piv = pd.pivot_table(
    source_yearly,
    values="event_no",
    index="year_occurred",
    columns="event_type",
    aggfunc="mean"
)
src_ylr_piv.plot(
    kind='bar',
    title='Emergency Events Observed Over Time',
    xlabel='Year Occurred', ylabel='Record count')
<Axes: title={'center': 'Emergency Events Observed Over Time'}, xlabel='Year Occurred', ylabel='Record count'>

Police Areas

Image(filename="../imgs/2019-CPD-Area-Boundaries.webp")
<IPython.core.display.Image object>

Referring to reporting and CPD public data, we map each police district observed in the data to the corresponding police area.

  • Area 1, now called Area Central, will include the 2nd, 3rd, 7th, 8th, and 9th districts on the South Side.
  • Area 2, now called Area South, will include the 4th, 5th, 6th, and 22nd districts on the Far South Side.
  • Area 3, now called Area North, will include the 1st, 12th, 18th, 19th, 20th, and 24th districts on the North Side, largely along the lakefront.
  • Area 4 will include the 10th, 11th, and 15th districts on the West Side.
  • Area 5 will include the 14th, 16th, 17th, and 25th districts on the Northwest Side.

Events by police area

  • The data refer to the police district associated with the call, but we map these to the reported corresponding police area to simplify analyses.
  • Presented are the emergency event counts by police area.
area count percent
0 1 131,142 34.5%
1 2 104,839 27.6%
2 4 69,066 18.2%
3 5 39,722 10.4%
4 3 35,361 9.3%

Source type by area

  • Presented are the record counts by source type and police area.
area event_type count percent
0 1 Human reporting gunfire 68,615 18.1%
1 1 ShotSpotter alert 62,527 16.4%
2 2 Human reporting gunfire 53,770 14.1%
3 2 ShotSpotter alert 51,069 13.4%
4 3 Human reporting gunfire 34,925 9.2%
5 3 ShotSpotter alert 436 0.1%
6 4 Human reporting gunfire 37,357 9.8%
7 4 ShotSpotter alert 31,709 8.3%
8 5 Human reporting gunfire 28,663 7.5%
9 5 ShotSpotter alert 11,059 2.9%

Area 1

  • Presented are the record counts for Area 1, the area with the plurality of events, by source type and year occurred.
event_type year_occurred count percent
0 Human reporting gunfire 2021 20,928 16.0%
1 ShotSpotter alert 2021 16,985 13.0%
2 Human reporting gunfire 2022 19,325 14.7%
3 ShotSpotter alert 2022 15,265 11.6%
4 ShotSpotter alert 2023 18,641 14.2%
5 Human reporting gunfire 2023 17,089 13.0%
6 ShotSpotter alert 2024 11,636 8.9%
7 Human reporting gunfire 2024 11,273 8.6%

Research questions

%%jmd

Is dispatch reported at the same rate for all police areas?

Dispatch Date reporting

  • Presented are the counts of observed non-missing "Dispatch Date" values.
dispatch_reported count percent
0 True 231,626 60.1%
1 False 153,822 39.9%
disp_byarea = report_fields(
    df=data,
    idcol='event_no',
    cols=['dispatch_reported', 'area'], headn=10).reset_index(drop=True).sort_values([
    'area', 'dispatch_reported',]).reset_index(drop=True)
more_unreported = disp_byarea.groupby('area')[['dispatch_reported', 'count']
    ].apply(checkgroup).reset_index().rename(columns={0: 'more_unreported'})
more_unreported = pd.merge(disp_byarea, more_unreported, on='area')

Dispatch Date reporting by area

  • Presented are the counts of observed non-missing "Dispatch Date" values by police area.
dispatch_reported area count percent
0 False 1 49,252 13.0%
1 True 1 81,890 21.5%
2 False 2 39,330 10.3%
3 True 2 65,509 17.2%
4 False 3 19,938 5.2%
5 True 3 15,423 4.1%
6 False 4 27,259 7.2%
7 True 4 41,807 11.0%
8 False 5 17,717 4.7%
9 True 5 22,005 5.8%
  • Highlighting areas where the rate of missing "Dispatch Date" values is higher than the rate of non-missing values.
dispatch_reported area count percent more_unreported
0 False 1 49,252 13.0% False
1 True 1 81,890 21.5% False
2 False 2 39,330 10.3% False
3 True 2 65,509 17.2% False
4 False 3 19,938 5.2% True
5 True 3 15,423 4.1% True
6 False 4 27,259 7.2% False
7 True 4 41,807 11.0% False
8 False 5 17,717 4.7% False
9 True 5 22,005 5.8% False
obsrate_byarea = data[['event_no', 'area',]].groupby('area')[[
    'event_no',]].apply(
    lambda df: df.event_no.nunique()/data.shape[0]
    ).reset_index().rename(columns={0: 'prop_events_reported'})
disprate_byarea = data[['event_no', 'area', 'dispatch_reported']
    ].groupby('area')[[
    'event_no', 'dispatch_reported']].apply(
    lambda df: df.dispatch_reported.sum()/df.event_no.nunique()
    ).reset_index().rename(columns={0: 'prop_dispatch_reported'})
# Assuming equal likelihood
cats = disprate_byarea.area.unique()
total = data.area.notna().sum()
props = disprate_byarea.prop_dispatch_reported
sig=0.05
loglikely = power_divergence(props, lambda_='log-likelihood')
chisquare = power_divergence(props, lambda_='pearson')

base_info = f"""This test will compare whether observed racial proportions match the proportion of each racial group in \
the general population. In interpreting the results, a p-value below {sig} will be considered statistically significant. \
In such cases where the p-value is statistically significant, the effect size will be measured and categorized as small, medium, or large.\n"""
null_phrase="the distribution of _____ follows the distribution of _____"
loglikely_info = text_statistic(
    loglikely,
    sig=sig, n=total, degf=len(cats)-1-0,
    null_phrase=null_phrase)
chisquare_info = text_statistic(
    chisquare,
    sig=sig, n=total, degf=len(cats)-1-0,
    null_phrase=null_phrase)

print(base_info)
print(chisquare_info)
This test will compare whether observed racial proportions match the proportion of each racial group in the general population. In interpreting the results, a p-value below 0.05 will be considered statistically significant. In such cases where the p-value is statistically significant, the effect size will be measured and categorized as small, medium, or large.

This test results in a p-value of 1.0, which is not statistically significant and fails to reject the null hypothesis that the distribution of _____ follows the distribution of _____. 

Summary

  • The Chi-Square test results tell us that the police areas have a similar proportion of dispatch reported when compared to each other.
  • Let's dig deeper and see if that is still true regardless of the type of source that reported the event (ShotSpotter alert or human caller).

Dispatch Date reporting by area and source type

  • Presented are the counts of observed non-missing "Dispatch Date" values by police area and the type of source which reported the emergency.
dispatch_reported event_type area count percent
0 False Human reporting gunfire 1 43,975 12.9%
1 False ShotSpotter alert 1 5,277 1.6%
2 True Human reporting gunfire 1 24,640 7.2%
3 True ShotSpotter alert 1 57,250 16.8%
4 False Human reporting gunfire 2 35,138 10.3%
5 False ShotSpotter alert 2 4,192 1.2%
6 True Human reporting gunfire 2 18,632 5.5%
7 True ShotSpotter alert 2 46,877 13.8%
8 False Human reporting gunfire 3 19,902 5.8%
9 False ShotSpotter alert 3 36 0.0%
10 True Human reporting gunfire 3 15,023 4.4%
11 True ShotSpotter alert 3 400 0.1%
12 False Human reporting gunfire 4 24,346 7.2%
13 False ShotSpotter alert 4 2,913 0.9%
14 True Human reporting gunfire 4 13,011 3.8%
15 True ShotSpotter alert 4 28,796 8.5%

Dispatch Date reporting by area and source type

  • Presented are the counts of observed non-missing "Dispatch Date" values by police area, source type, and

Area 1

dispatch_reported shotspotter_first event_type count percent
0 False False Human reporting gunfire 43,975 33.5%
1 False False ShotSpotter alert 5,277 4.0%
2 True False Human reporting gunfire 24,617 18.8%
3 True False ShotSpotter alert 8,710 6.6%
4 True True Human reporting gunfire 23 0.0%
5 True True ShotSpotter alert 48,540 37.0%

Area 2

dispatch_reported shotspotter_first event_type count percent
0 False False Human reporting gunfire 35,138 33.5%
1 False False ShotSpotter alert 4,192 4.0%
2 True False Human reporting gunfire 18,623 17.8%
3 True False ShotSpotter alert 7,159 6.8%
4 True True Human reporting gunfire 9 0.0%
5 True True ShotSpotter alert 39,718 37.9%

Area 3

dispatch_reported shotspotter_first event_type count percent
0 False False Human reporting gunfire 19,902 56.3%
1 False False ShotSpotter alert 36 0.1%
2 True False Human reporting gunfire 15,023 42.5%
3 True False ShotSpotter alert 77 0.2%
4 True True ShotSpotter alert 323 0.9%

Area 4

dispatch_reported shotspotter_first event_type count percent
0 False False Human reporting gunfire 24,346 35.3%
1 False False ShotSpotter alert 2,913 4.2%
2 True False Human reporting gunfire 13,000 18.8%
3 True False ShotSpotter alert 3,709 5.4%
4 True True Human reporting gunfire 11 0.0%
5 True True ShotSpotter alert 25,087 36.3%

Area 5

dispatch_reported shotspotter_first event_type count percent
0 False False Human reporting gunfire 16,871 42.5%
1 False False ShotSpotter alert 846 2.1%
2 True False Human reporting gunfire 11,789 29.7%
3 True False ShotSpotter alert 762 1.9%
4 True True Human reporting gunfire 3 0.0%
5 True True ShotSpotter alert 9,451 23.8%

Dispatch Date reporting by area and source type

  • Presented are the counts of observed non-missing "Dispatch Date" values by police area, source type, and

ShotSpotter alerts

dispatch_reported shotspotter_first area count percent
0 False False 1 5,277 3.4%
1 False False 2 4,192 2.7%
2 False False 3 36 0.0%
3 False False 4 2,913 1.9%
4 False False 5 846 0.5%
5 True False 1 8,710 5.6%
6 True False 2 7,159 4.6%
7 True False 3 77 0.0%
8 True False 4 3,709 2.4%
9 True False 5 762 0.5%
10 True True 1 48,540 31.0%
11 True True 2 39,718 25.3%
12 True True 3 323 0.2%
13 True True 4 25,087 16.0%
14 True True 5 9,451 6.0%

911 calls

dispatch_reported shotspotter_first area count percent
0 False False 1 43,975 19.7%
1 False False 2 35,138 15.7%
2 False False 3 19,902 8.9%
3 False False 4 24,346 10.9%
4 False False 5 16,871 7.6%
5 True False 1 24,617 11.0%
6 True False 2 18,623 8.3%
7 True False 3 15,023 6.7%
8 True False 4 13,000 5.8%
9 True False 5 11,789 5.3%
10 True True 1 23 0.0%
11 True True 2 9 0.0%
12 True True 4 11 0.0%
13 True True 5 3 0.0%

Summary

RE: Soundthinking / Brookings Institute claim that some 80% of gunfire events do not get reported by citizens - is that True in Chicago?

notmisc = sst.loc[(sst.misc_event == False) & (sst.disposition_reported)]

Disposition data

In the info page included with the data, the CPD FOIA officer informed us that they had internally identified emergency events from both sources, 911 callers and ShotSpotter, that referred to the same underlying gunfire event, and that the disposition field was only included in the responsive records when ShotSpotter was the first to report. Source

  • Of the 385,448 emergency events included in the analysis, 123,169 or 78.5% have a reported disposition.

ShotSpotter data, labeled duplicate

labeled_duplicate count percent
0 False 122,990 78.4%
1 True 33,906 21.6%

ShotSpotter data, disposition provided

disposition_reported count percent
0 True 123,169 78.5%
1 False 33,727 21.5%

ShotSpotter data, labeled duplicate and disposition provided

labeled_duplicate disposition_reported count percent
0 False True 122,696 78.2%
1 True False 33,433 21.3%
2 True True 473 0.3%
3 False False 294 0.2%

5 most frequently reported disposition values

Presented are the 5 most frequently reported `disposition` values for emergency events in which ShotSpotter was the first alert.
disposition count percent
0 MISC.INC./OTH POLICE SER 87,385 70.9%
1 MISC.INC./NO PERSON FND. 15,666 12.7%
2 WEAP VIO/DISC OF FIREA 5,433 4.4%
3 BATTERY:AGGR:HANDGUN 2,744 2.2%
4 ASSAULT;AGG HAND 1,314 1.1%
**ShotSpotter data, miscellaneous disposition**
misc_event count percent
0 True 104,550 84.9%
1 False 18,619 15.1%
  • Of the 123,169 emergency events about potential gunfire identified by CPD as first reported by ShotSpotter, 104,550 or 84.9% are labeled as a "Miscellaneous Incident."
  • 84.9% of ShotSpotter-original alerts are labeled as a "Miscellaneous Incident".

5 most frequently reported disposition values other than "Miscellaneous Incident"

Presented are the 5 most frequently reported `disposition` values other than "Miscellaneous Incident" for emergency events in which ShotSpotter was the first alert.
disposition count percent
0 WEAP VIO/DISC OF FIREA 5,433 29.2%
1 BATTERY:AGGR:HANDGUN 2,744 14.7%
2 ASSAULT;AGG HAND 1,314 7.1%
3 UNLAWFUL USE OF WEAPON O 1,299 7.0%
4 CRIMINAL DAMAGE TO PROP 1,042 5.6%

Statements and assumptions

Underlying events

  • The real-world event involving gunfire that prompted a true positive report to OEMC and/or CPD will be referred to as the "underlying emergency event". These emergency records have not been de-duplicated with respect to the singular real-world event being reported, and it's possible that multiple sources provided a report for any given event.
  • In producing a single ShotSpotter alert, Soundthinking has described their service as triangulating detections from multiple sensors/microphones to pinpoint the location and produce an alert to send to law enforcement and emergency services. To that point, we might expect that no two ShotSpotter alerts may be co-referant for a singular event, ie) ShotSpotter does not send additional alerts if law enforcement has not yet arrived on scene. However, the same would not be true for 911 callers, for whom there is reporting describing repeat calls to 911 after time passed without any officers arriving on scene to address the emergency.

Linking reports of the same event

  • Recall that per CPD, disposition was only reported when ShotSpotter was the first to alert OEMC and/or CPD to the emergency. If ShotSpotter was not the first to alert OEMC and/or CPD to an underlying emergency, then I assume that the first alert came from a human (in other words and most often, from a civilian 911 call). Additionally, regardless of whether or not the alert came first, CPD labeled all ShotSpotter alerts indicating whether or not the alert was understood to be a duplicate for another report.
    • If ShotSpotter was not the first to alert OEMC and/or CPD to a potential gunfire event, then the ShotSpotter alert is co-referant to at least one 911 call for the same underlying emergency event.
  • Two emergency event records will be considered "co-referant" if:
    1. the date_occurred timestamps are within +/- 10 minutes of each other, and
    2. the location values are at least 95% similar in terms of string distance. Note that this method has limitations and will likely only identify events on streets of the same name as co-referant, rather than linking events that are geographically neighbors of each other.

Interpreting record linkage by case

  • Case 1: ShotSpotter alerts with no co-referent 911 calls
    • labeled_duplicate should be False
    • We should have disposition reported for all of these events in the data.
    • When disposition refers to a "Miscellaneous Incident", we will consider the alert to be a false positive gunfire alert, and all other disposition categories will be assumed true positive gunfire alert.
    • The proportion of alerts that have no co-referant 911 calls and a disposition other than "Miscellaneous Incident" will be interpreted as the proportion of gunfire events captured by ShotSpotter that would have otherwise gone unreported to OEMC and/or CPD.
      • This proportion will be compared to 1) the Brookings Institute proposed statistic of 80% of gunfire events and 2) the proportion of false positive ShotSpotter gunfire alerts.
  • Case 2: ShotSpotter alerts with co-referent 911 calls
    • labeled_duplicate may or may not be False
    • We should have disposition reported for those events which ShotSpotter alertted to first, but only those.
    • We have no disposition data to assess the proportion of 911 calls that are false positive gunfire reports. However, if a ShotSpotter alert is co-referant with a 911 call, then either 1) both reports are true positives or 2) both are false positives, so the proportion is irrelevant.
  • Case 3: 911 calls with no co-referent ShotSpotter alerts
    • labeled_duplicate will not be reported for the vast majority of these events
    • No disposition data reported for these events to assess the proportion of 911 calls that are false positive gunfire reports, but the true positives would be interpreted as the proportion of gunfire events not captured by ShotSpotter that would have otherwise gone unreported to OEMC and/or CPD.

Tests

If the Brookings Institute's proposed statistic is reasonable to apply in Chicago, we expect that no more than 20% of underlying gunfire reports in the data are one of the following:

  • Subset of Case 2: a 911 call which occurs prior to a co-referent ShotSpotter alert
  • Case 3: a 911 call with no co-referent ShotSpotter alert

Alternatively, we expect that the proportion of ShotSpotter alerts that have no co-referant 911 calls and a disposition other than "Miscellaneous Incident" will be some 80% of underlying gunfire reports.

sst_notdup = sst.loc[(sst.labeled_duplicate == False)]
sst_nocands = (sst.cluster.isna())
sst_nocands_vc = report_fields(
    df=sst,
    idcol='event_no',
    cols=['shotspotter_alert', 'any_coref', 'misc_event',],
    headn=5)
  • The data we obtained from CPD only covered ShotSpotter alerts, so we only have labels from CPD indicating duplicate reports for those events that Shotspotter at least one alert.

Emergency events by reporting source and whether CPD labeled the event as a duplicate report

event_type labeled_duplicate misc_event count percent
0 ShotSpotter alert False True 104,251 84.6%
1 ShotSpotter alert False False 18,445 15.0%
2 ShotSpotter alert True True 299 0.2%
3 ShotSpotter alert True False 174 0.1%
4 Human reporting gunfire False True 35 0.0%

ShotSpotter alerts labeled as NOT a duplicate event, by multiple factors

event_type shotspotter_first any_coref misc_event count percent
0 ShotSpotter alert True False True 102,662 83.7%
1 ShotSpotter alert True False False 16,991 13.8%
2 ShotSpotter alert True True True 1,589 1.3%
3 ShotSpotter alert True True False 1,454 1.2%
  • 17,150 or 13.9% of the emergency events analyzed were ShotSpotter alerts that have no identified co-referant 911 calls and a disposition referring to something other than "Miscellaneous Incident."
sst[['shotspotter_first', 'disposition_reported', 'misc_event']].fillna('no data').value_counts()
shotspotter_first  disposition_reported  misc_event
True               True                  True          104550
False              False                 no data        33727
True               True                  False          18619
Name: count, dtype: int64
import plotly.graph_objects as go

fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = ["Shotspotter alerts",
               "Labeled first report",
               "Not labeled first report (no disposition data)",
               "Labeled miscellaneous incident",
               "Not labeled miscellaneous incident",
              ],
      color = ["#00cc7a", "#00ccad", "green", "#00b8cc", "#00cc7a",]
    ),
    link = dict(
      source = [0, 0, 1, 2, 1, 2,], # indices correspond to labels, eg A1, A2, A1, B1, ...
      target = [1, 2, 3, 3, 4, 4,],
      value = [
          (sst.shotspotter_first == True).sum(),
          (sst.shotspotter_first == False).sum(),
          ((sst.shotspotter_first == True) & (sst.misc_event == True)).sum(),
          ((sst.shotspotter_first == False) & (sst.misc_event == True)).sum(),
          ((sst.shotspotter_first == True) & (sst.misc_event == False)).sum(),
          ((sst.shotspotter_first == False) & (sst.misc_event == False)).sum(),
      ],
      hovercolor=["#00cc7a", "#00ccad", "lightblue", "mediumturquoise", "lightgreen", "cyan",],
  ))])

fig.update_layout(title_text="Labeling of ShotSpotter alerts", font_size=10)
fig.show()
 

Summary